Leitura de arquivos individuais

Business

Clusterização dos estabelecimentos por semelhança de atributos

yelp_bz <- yelp_bz_raw %>% 
          select_if(~is.numeric(.)) %>% 
          mutate_all(~replace(., is.na(.), 0))

glimpse(yelp_bz)
## Rows: 14,962
## Columns: 34
## $ latitude                   <dbl> 43.62661, 43.64041, 43.61129, 43.70441, 43.6709…
## $ longitude                  <dbl> -79.50209, -79.39058, -79.55687, -79.37511, -79…
## $ review_count               <dbl> 4, 81, 3, 3, 4, 6, 10, 52, 14, 4, 4, 11, 7, 3, …
## $ stars                      <dbl> 2.0, 2.5, 1.0, 5.0, 3.0, 4.5, 3.0, 2.5, 3.5, 2.…
## $ AcceptsInsurance           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ AgesAllowed                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Alcohol                    <dbl> 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ BYOB                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ BikeParking                <dbl> 2, 1, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2, 2, 0, 1, 0,…
## $ BusinessAcceptsCreditCards <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,…
## $ ByAppointmentOnly          <dbl> 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Caters                     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,…
## $ CoatCheck                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Corkage                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,…
## $ DogsAllowed                <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ DriveThru                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ GoodForDancing             <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ GoodForKids                <dbl> 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 2, 0,…
## $ HappyHour                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ HasTV                      <dbl> 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 2,…
## $ NoiseLevel                 <dbl> 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
## $ OutdoorSeating             <dbl> 0, 2, 0, 0, 0, 0, 2, 1, 2, 0, 0, 2, 1, 1, 1, 0,…
## $ RestaurantsAttire          <dbl> 0, 3, 0, 0, 0, 0, 3, 3, 3, 0, 0, 3, 3, 0, 0, 0,…
## $ RestaurantsDelivery        <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 2, 0,…
## $ RestaurantsGoodForGroups   <dbl> 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 1, 0,…
## $ RestaurantsPriceRange2     <dbl> 2, 2, 0, 0, 2, 0, 2, 1, 2, 0, 0, 1, 2, 0, 2, 0,…
## $ RestaurantsReservations    <dbl> 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 1, 0,…
## $ RestaurantsTableService    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,…
## $ RestaurantsTakeOut         <dbl> 0, 2, 0, 0, 0, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2,…
## $ Smoking                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ WheelchairAccessible       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0,…
## $ WiFi                       <dbl> 0, 3, 0, 0, 0, 0, 3, 3, 3, 0, 0, 3, 3, 0, 0, 0,…
## $ tips_counter_bz            <dbl> 0, 14, 0, 0, 0, 1, 4, 5, 5, 0, 0, 6, 0, 1, 3, 3…
## $ total_compliments_bz       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…

PCA

Será aplicada uma análise de componentes principais para entender a variabilidade da nota dos estabelecimentos considerando seus atributos.

rec_pca <- recipe(stars ~ ., yelp_bz) %>% 
  update_role(contains('id'), new_role = 'id') %>% 
  #step_date(date_rv, yelping_since_usr, features = c("dow", "month","year")) %>% 
  #step_other(categories, threshold = 0.005) %>% 
  #step_other(postal_code, threshold = 0.01) %>% 
  #step_dummy(all_nominal(), -'business_id',-'user_id',-'name_bz') %>%
  step_normalize(all_numeric(), -all_outcomes()) %>% 
  step_pca(all_numeric(), -all_outcomes()) %>% 
  step_naomit(all_numeric()) %>% 
  prep()

yelp_bz_pca <- juice(rec_pca)

Scree plot

variance_pct <- rec_pca$steps[[2]]$res

(cumsum(variance_pct$sdev^2) / sum(variance_pct$sdev^2))
##  [1] 0.2795283 0.3569530 0.4181779 0.4760438 0.5246121 0.5650128 0.6019585 0.6384455
##  [9] 0.6714870 0.7020952 0.7307504 0.7590915 0.7806310 0.8013364 0.8214466 0.8408425
## [17] 0.8588443 0.8747742 0.8894284 0.9032762 0.9169327 0.9301989 0.9408237 0.9507912
## [25] 0.9598340 0.9678033 0.9751966 0.9818828 0.9879589 0.9930369 0.9974065 1.0000000
## [33] 1.0000000
fviz_eig(variance_pct, addlabels = TRUE) + 
  labs(x = "Componente Principal",
       y = "Percentual explicado da variância")

Mais de 50% da variabilidade é explicada pelas 5 primeiras componentes, que são compostas da seguinte forma:

Drivers

tidy_pca <- tidy(rec_pca, 2)

tidy_pca %>%
  filter(component %in% paste0("PC", 1:6)) %>%
  group_by(component) %>%
  top_n(15, abs(value)) %>%
  ungroup() %>%
  mutate(terms = reorder_within(terms, abs(value), component)) %>%
  ggplot(aes(abs(value), terms, fill = value > 0)) +
  geom_col() +
  facet_wrap(~component, scales = "free_y") +
  scale_y_reordered() +
  labs(
    x = "Valor absoluto da contribuição",
    y = NULL, fill = "Valor > 0")

Na primeira componente, os pesos são igualmente distribuídos, o que indica que todos os atributos tem impacto semelhante na maior parte da variabilidade.

Pela segunda componente, no entanto, observa-se que a existência de um local para deixar o casaco, ser permitido fumar e ser um bom local para dançar são mais relevante, assim como a localização (PC6). Além disso, cobrança de rolha e necessidade de levar a bebida também são drivers importantes, pois aparecem em mais de uma componente.

Contrastes

variance_pct %>% 
  fviz_pca_var(axes = c(1,2), col.var="contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))

Os maiores contrastes são entre a modalidade de atendimento dos restaurante: Apenas delivery ou com reservas.

Users

Classificação dos usuários conforme o perfil.

K-Means

Será utilizada a clusterização k-médias por conta da quantidade de características dos usários presentes na base. Também foi tentada a aplicação de uma clusterização hierárquica, mas os resultados obtidos não foram tão interpretáveis como os seguintes.

set.seed(123)

glimpse(yelp_users)
## Rows: 119,792
## Columns: 23
## $ user_id            <chr> "-4Anvj46CWf57KWI9UQDLg", "-BUamlG3H-7yqpAl1p-msw", "-C…
## $ average_stars      <dbl> 3.50, 1.50, 3.00, 3.56, 3.00, 4.00, 4.17, 3.57, 4.48, 4…
## $ compliment_cool    <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_cute    <dbl> 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_funny   <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_hot     <dbl> 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, …
## $ compliment_list    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_more    <dbl> 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_note    <dbl> 0, 0, 1, 0, 0, 0, 0, 16, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ compliment_photos  <dbl> 0, 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_plain   <dbl> 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, …
## $ compliment_profile <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_writer  <dbl> 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cool               <dbl> 2, 0, 1, 0, 1, 0, 0, 1562, 2, 1, 1, 9, 0, 5, 0, 9, 0, 0…
## $ elite_count        <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ fans               <dbl> 1, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ friends_count      <dbl> 1, 16, 15, 27, 1, 1, 1, 338, 59, 6, 10, 100, 8, 1, 79, …
## $ funny              <dbl> 0, 0, 1, 0, 0, 0, 0, 1266, 3, 1, 4, 0, 1, 1, 1, 5, 0, 0…
## $ review_count_usr   <dbl> 2, 2, 4, 27, 2, 6, 6, 66, 28, 3, 8, 37, 4, 20, 1, 18, 5…
## $ useful             <dbl> 2, 0, 1, 5, 1, 3, 16, 1683, 12, 1, 2, 30, 4, 30, 0, 19,…
## $ year_since         <dbl> 2016, 2016, 2011, 2019, 2014, 2017, 2014, 2019, 2014, 2…
## $ tips_counter       <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 19, 0, 0, 0, 0, 0, 2, 0, 0, …
## $ total_compliments  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
yelp_pad <- yelp_users %>% 
              select(-user_id) %>% 
              scale()

kclusts <- tibble(k = 1:30) %>%
  mutate(kclust = map(k, ~kmeans(yelp_pad, .x)),
        tidied = map(kclust, tidy),
        glanced = map(kclust, glance),
        augmented = map(kclust, augment, yelp_pad)
        )

clusters <- kclusts %>%
  unnest(cols = c(tidied))

assignments <- kclusts %>% 
  unnest(cols = c(augmented))

clusterings <- kclusts %>%
  unnest(cols = c(glanced))
### Cotovelo

clusterings %>% 
  ggplot(aes(k, tot.withinss)) + 
    geom_point(size = 3) + 
    geom_line() + 
    labs(y = "total within sum of squares", x = "k") +
    scale_x_continuous(breaks = 1:30)

Pelo gráfico do cotovelo, poderiam ser selecionado um número de clusters (k) de 11 a 17, a seguir é possível ver uma comparacão em relação às dicas.

#k-means
assignments %>% 
  filter(k %in% paste0(10:20)) %>%
  ggplot(aes(x = tips_counter, y = total_compliments)) +
  geom_point(aes(color = .cluster), alpha = 0.5) + 
  facet_wrap(~ k, nrow = 3)

Para a classificação final dos usuário, será feita novamente a clusterização, mas considerando apenas o k ideal.

set.seed(123)
kmeans_usr <-  kmeans(yelp_pad, 11)

yelp_usr_cluster <- yelp_users %>% 
          mutate(cluster_usr = kmeans_usr$cluster)

glimpse(yelp_usr_cluster)
## Rows: 119,792
## Columns: 24
## $ user_id            <chr> "-4Anvj46CWf57KWI9UQDLg", "-BUamlG3H-7yqpAl1p-msw", "-C…
## $ average_stars      <dbl> 3.50, 1.50, 3.00, 3.56, 3.00, 4.00, 4.17, 3.57, 4.48, 4…
## $ compliment_cool    <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_cute    <dbl> 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_funny   <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_hot     <dbl> 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, …
## $ compliment_list    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_more    <dbl> 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_note    <dbl> 0, 0, 1, 0, 0, 0, 0, 16, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ compliment_photos  <dbl> 0, 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_plain   <dbl> 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, …
## $ compliment_profile <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_writer  <dbl> 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cool               <dbl> 2, 0, 1, 0, 1, 0, 0, 1562, 2, 1, 1, 9, 0, 5, 0, 9, 0, 0…
## $ elite_count        <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ fans               <dbl> 1, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ friends_count      <dbl> 1, 16, 15, 27, 1, 1, 1, 338, 59, 6, 10, 100, 8, 1, 79, …
## $ funny              <dbl> 0, 0, 1, 0, 0, 0, 0, 1266, 3, 1, 4, 0, 1, 1, 1, 5, 0, 0…
## $ review_count_usr   <dbl> 2, 2, 4, 27, 2, 6, 6, 66, 28, 3, 8, 37, 4, 20, 1, 18, 5…
## $ useful             <dbl> 2, 0, 1, 5, 1, 3, 16, 1683, 12, 1, 2, 30, 4, 30, 0, 19,…
## $ year_since         <dbl> 2016, 2016, 2011, 2019, 2014, 2017, 2014, 2019, 2014, 2…
## $ tips_counter       <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 19, 0, 0, 0, 0, 0, 2, 0, 0, …
## $ total_compliments  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cluster_usr        <int> 3, 7, 10, 9, 3, 9, 4, 9, 4, 10, 3, 10, 4, 3, 7, 9, 3, 1…

Pelo gráfico, observa-se claramente a divisão dos usuários em relação ao tempo na plataforma, a nota média e a quantidade de fãs.

plot_ly(yelp_usr_cluster, x = ~year_since, 
               y = ~average_stars,
               z = ~fans, color = ~cluster_usr,
              text = ~paste('Cluster: ', cluster_usr)) %>% 
  add_markers() %>% 
  layout(scene = list(xaxis = list(title = 'No Yelp desde'),
                                   yaxis = list(title = 'Nota Média'),
                                   zaxis = list(title = 'Quantidade de fãs')))
yelp_usr_cluster %>% 
          select(user_id, cluster_usr) %>%
          write.csv(file = "output/usr_cluster.csv")

Como próximos passos, seria interessante entnder melhor as características de cada cluster. Para classificar usuário que não estão na base, será utilizado um modelo de árvore para fazer a classificação. O intuito é obter de uma forma rápida o cluster de um novo usuário.

Modelo para definição do cluster do usuário

user_cluster_tree <- yelp_usr_cluster %>% 
                    select(-user_id) %>% 
                    rpart(cluster_usr ~ ., data = .)

plot_arvore <- as.party(user_cluster_tree)

#plot(plot_arvore)

Rede Neural

Leitura da base final

yelp_rv <- yelp_raw %>% 
  #mutate(line = row_number()) %>% 
  select(-'year_rv') %>% 
  mutate(stars_rv = replace(stars_rv >= 4,1,0)) %>% 
  select_if(is.numeric) #%>% sample_frac(0.50)

glimpse(yelp_rv)
## Rows: 219,462
## Columns: 58
## $ average_stars              <dbl> 1.00, 3.21, 1.50, 3.99, 4.23, 3.64, 3.67, 3.91,…
## $ compliment_cool            <dbl> 0, 0, 0, 4, 2, 1, 0, 3, 0, 0, 0, 0, 1, 0, 0, 4,…
## $ compliment_cute            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_funny           <dbl> 0, 0, 0, 4, 2, 1, 0, 3, 0, 0, 0, 0, 1, 0, 0, 4,…
## $ compliment_hot             <dbl> 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ compliment_list            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_more            <dbl> 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ compliment_note            <dbl> 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,…
## $ compliment_photos          <dbl> 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_plain           <dbl> 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 10…
## $ compliment_profile         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_writer          <dbl> 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 1,…
## $ cool                       <dbl> 0, 2, 0, 35, 45, 1, 0, 37, 0, 0, 17, 0, 8, 0, 0…
## $ elite_count                <dbl> 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ fans                       <dbl> 0, 0, 0, 1, 7, 0, 0, 9, 0, 0, 4, 0, 1, 0, 0, 2,…
## $ friends_count              <dbl> 12, 1, 1, 44, 31, 8, 1, 147, 1, 20, 34, 1, 3, 1…
## $ funny                      <dbl> 0, 1, 0, 18, 27, 0, 1, 20, 0, 0, 2, 0, 9, 0, 0,…
## $ review_count_usr           <dbl> 1, 19, 2, 105, 279, 14, 3, 190, 2, 4, 38, 1, 13…
## $ useful                     <dbl> 0, 1, 1, 90, 99, 1, 1, 100, 1, 1, 19, 1, 53, 0,…
## $ year_since                 <dbl> 2017, 2016, 2017, 2011, 2017, 2017, 2015, 2014,…
## $ tips_counter               <dbl> 0, 0, 0, 1, 1, 5, 1, 9, 0, 1, 0, 0, 5, 0, 0, 16…
## $ total_compliments          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ cluster_usr                <dbl> 7, 13, 7, 11, 12, 13, 3, 12, 13, 7, 9, 7, 3, 13…
## $ stars_rv                   <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ latitude                   <dbl> 43.64041, 43.64041, 43.64041, 43.64041, 43.6404…
## $ longitude                  <dbl> -79.39058, -79.39058, -79.39058, -79.39058, -79…
## $ review_count               <dbl> 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81,…
## $ stars                      <dbl> 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.…
## $ AcceptsInsurance           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ AgesAllowed                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Alcohol                    <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ BYOB                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ BikeParking                <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ BusinessAcceptsCreditCards <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ ByAppointmentOnly          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Caters                     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ CoatCheck                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Corkage                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ DogsAllowed                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ DriveThru                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ GoodForDancing             <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ GoodForKids                <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ HappyHour                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ HasTV                      <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ NoiseLevel                 <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ OutdoorSeating             <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ RestaurantsAttire          <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ RestaurantsDelivery        <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ RestaurantsGoodForGroups   <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ RestaurantsPriceRange2     <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ RestaurantsReservations    <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ RestaurantsTableService    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ RestaurantsTakeOut         <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ Smoking                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ WheelchairAccessible       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ WiFi                       <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ tips_counter_bz            <dbl> 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,…
## $ total_compliments_bz       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…

Bases de Treino e Teste

split <- initial_split(yelp_rv, prop = 0.8 , strata = stars_rv)

train_val <- training(split)


split_val <- initial_split(train_val, prop = 0.5, strata = stars_rv)

yelp_train <- training(split_val)
yelp_val <- testing(split_val)
yelp_test <- testing(split)
  • Normalização pela média e desvio padrão da base teste
mean <- yelp_train %>% 
        select(-stars_rv) %>% 
        apply(., 2, mean) 

std <- yelp_train %>% 
        select(-stars_rv) %>% 
        apply(., 2, sd)
x_train <- yelp_train %>% 
            select(-stars_rv) %>% 
            scale(center = mean, scale = std) %>% 
            as.matrix()

dim(x_train)
## [1] 87786    57
y_train <- yelp_train %>% 
            select(stars_rv) %>% 
            as.matrix()

x_val <-  yelp_val %>% 
            select(-stars_rv) %>% 
            scale(center = mean, scale = std) %>% 
            as.matrix()

dim(x_val)
## [1] 87785    57
y_val <- yelp_val %>% 
            select(stars_rv) %>% 
            data.matrix()

dim(x_val)
## [1] 87785    57
x_test <- yelp_test %>% 
          select(-stars_rv) %>% 
          scale(center = mean, scale = std) %>% 
          as.matrix()

dim(x_test) 
## [1] 43891    57
y_test <- yelp_test %>% 
            select(stars_rv) %>% 
            data.matrix()

Modelo

rm(yelp_nn)

yelp_nn <- keras_model_sequential() %>% 
  layer_dense(units = 30, activation = "tanh", input_shape = ncol(x_train)) %>%
  layer_dropout(rate = 0.5) %>%
  layer_dense(units = 16, activation = "relu") %>%
  #layer_dropout(rate = 0.5) %>%
  layer_dense(units = 16, activation = "relu") %>%
  #layer_dense(units = 6, activation = "softmax")
  layer_dense(units = 1, activation = "sigmoid")

yelp_nn %>% 
  compile(optimizer = "rmsprop", 
          #loss = "sparse_categorical_crossentropy", 
          loss = "binary_crossentropy",
          metrics = c("accuracy"))


history <- yelp_nn %>% 
  fit(x_train, y_train, 
      epochs = 40, batch_size = 512, 
      validation_data = list(x_val, y_val))

plot(history)
## `geom_smooth()` using formula 'y ~ x'

keras::get_weights(yelp_nn)
## [[1]]
##                [,1]          [,2]          [,3]          [,4]         [,5]
##  [1,]  0.6094545722 -0.4749614298  0.3541086018 -0.4428369701 -0.655121207
##  [2,] -0.0394935459 -0.0203993767 -0.1796677858  0.0410629697  0.235627651
##  [3,]  0.1150243357 -0.1785386056  0.0938132852  0.2151044756 -0.203980863
##  [4,]  0.1204489470  0.1927195936  0.1893769354  0.2431167513  0.237795889
##  [5,] -0.1355855465  0.1677446961  0.1107519418 -0.0383688547  0.097957835
##  [6,] -0.1350986511 -0.0802458376  0.1391153187 -0.2267953604  0.281118870
##  [7,] -0.1834388673 -0.1196382940  0.1005086154 -0.0146540077 -0.133926347
##  [8,] -0.0952792019  0.1544742584 -0.2102304250 -0.2317446619  0.097554296
##  [9,] -0.1308181882 -0.0727197677 -0.0194583517 -0.1283243895 -0.063994192
## [10,]  0.1468087435 -0.2453448027  0.1376343518  0.1514547914 -0.336096257
## [11,]  0.1967902184  0.1673227400  0.1810142547  0.2003618628  0.094515443
## [12,] -0.0111451307 -0.1536421180  0.1011863351 -0.2090851218  0.189533710
## [13,]  0.0747257397 -0.0180378761  0.1988334507  0.0174768157  0.057531830
## [14,] -0.0511272773 -0.0033279194  0.0958851576 -0.0243877731 -0.160894617
## [15,] -0.1030690670  0.1699993312 -0.1041736081  0.1925117522  0.061366778
## [16,] -0.0002413686 -0.0200763997  0.0559247844 -0.0111677572 -0.022314997
## [17,]  0.1810127795  0.0756299868  0.0665282831 -0.0539779700 -0.057382375
## [18,]  0.0592285581  0.0620267503 -0.2536863089 -0.0653754324  0.197270438
## [19,] -0.0230310708 -0.0228785779  0.1963366419 -0.0242696255 -0.163470387
## [20,] -0.0660066232  0.0107743675  0.0233811066 -0.0547488891 -0.063670598
## [21,] -0.0003246044 -0.0106878700  0.0441279933 -0.0174635574 -0.167306900
## [22,]  0.1243580952  0.0525388010 -0.1667653620  0.0599543974  0.213982433
## [23,] -0.0364245698  0.0085796192  0.0120518366 -0.0001108671  0.019147670
## [24,]  0.0449368320 -0.0372285992  0.0021126377 -0.0032062093 -0.031016760
## [25,] -0.0257117711  0.0339759737  0.0275775362  0.0202764496  0.015631739
## [26,]  0.0435284376  0.0673498660  0.2094453722  0.0074568819 -0.118998326
## [27,]  0.3811446726 -0.6182153821  0.6504760981 -0.1554894447 -0.336833626
## [28,]  0.0901296288 -0.0315203555  0.0722742900 -0.0354919881  0.006983003
## [29,] -0.0115256310  0.0609332919 -0.0029702515  0.0420379825 -0.361402273
## [30,]  0.0116669172  0.0189346205 -0.1175204739 -0.0113584073  0.029726485
## [31,] -0.0100175468 -0.0637853891  0.0558892600  0.0198947191 -0.187102616
## [32,]  0.0195637234  0.0504271016  0.0308048837  0.0322431400  0.025857905
## [33,]  0.0187816266  0.0051208157  0.0070411614  0.0050138412  0.037859101
##                [,6]         [,7]          [,8]         [,9]         [,10]
##  [1,]  0.4371570349 -0.549427748 -3.868998e-01  0.405837446  0.3706296086
##  [2,]  0.0523206964 -0.089967407  9.482827e-02 -0.194767714 -0.0493119918
##  [3,] -0.2296333015  0.208597079 -1.781916e-01  0.196854204 -0.3198908865
##  [4,] -0.2835386395  0.123967148  1.170011e-01  0.013133104  0.2885253727
##  [5,] -0.1260014921 -0.252608389 -1.375182e-01 -0.209502280 -0.1215947941
##  [6,] -0.1164530739 -0.041656960  1.781321e-01  0.021474242 -0.3044258952
##  [7,]  0.2701379359  0.045553014 -1.200169e-01 -0.045938756  0.1225088462
##  [8,] -0.0801308826  0.072096616  1.932938e-02  0.120933078 -0.0132030519
##  [9,] -0.3159334660 -0.242813870  8.762016e-02  0.038546912 -0.1922984868
## [10,] -0.1963677555 -0.075676203 -1.673460e-01  0.007502505 -0.0474456102
## [11,]  0.2045760155  0.176992431 -8.338238e-02 -0.081978336 -0.2257487625
## [12,] -0.2124243081  0.233003959 -9.562983e-02 -0.025574611 -0.2030904591
## [13,]  0.1450067908  0.049171355  1.562534e-02  0.145802513 -0.0742091089
## [14,]  0.0481342450  0.373851746  3.702985e-02 -0.002289793 -0.0307355747
## [15,] -0.1546295434  0.081347331 -5.477034e-04 -0.044991374 -0.0458727926
## [16,]  0.0293789580  0.364131451  4.198668e-02 -0.069224298 -0.1014279425
## [17,] -0.0094344169  0.139708593 -3.092309e-02 -0.140755132  0.4060024917
## [18,]  0.0259873960 -0.241443291 -2.368002e-02 -0.004487220  0.1961882114
## [19,] -0.0006819399  0.254184008  1.072929e-01  0.155561551  0.0436611250
## [20,]  0.0264473986 -0.049973533  3.007169e-02 -0.098555148 -0.0347826928
## [21,]  0.0748036727 -0.066090897 -6.072568e-03 -0.033720732  0.1098008752
## [22,]  0.0226566643 -0.326154292  2.673029e-02 -0.090960182 -0.2199163139
## [23,]  0.0311658382  0.015701128 -1.953179e-02  0.110052265  0.0807325244
## [24,] -0.0046645310 -0.021014942 -2.701281e-02 -0.017125441  0.0196815114
## [25,]  0.0233575180 -0.029191423  3.468003e-02 -0.023488579 -0.0293543097
## [26,] -0.0615089722  0.070390500  7.595991e-02 -0.054819535 -0.0838055238
## [27,]  0.5328783989 -0.398405194 -2.506301e-01  0.243116111  0.1920093447
## [28,]  0.0560581051 -0.012278790  1.283942e-02 -0.025038337 -0.0206865184
## [29,] -0.1921747625 -0.184246510  1.339100e-02 -0.015998797 -0.0256808102
## [30,]  0.0579077415 -0.182403922 -3.786369e-02  0.114237718  0.1584116817
## [31,]  0.0576090589 -0.129636660  3.967950e-03 -0.031206505 -0.2018909156
## [32,]  0.0105164815  0.003837890  4.117323e-02 -0.003388634  0.0009648166
## [33,] -0.0432431996  0.013813736  2.640472e-02 -0.024962518 -0.0607946031
##              [,11]        [,12]         [,13]         [,14]         [,15]
##  [1,] -0.712211490 -0.814891696 -4.627850e-01 -0.4221532643  4.964411e-01
##  [2,]  0.003752558 -0.163875610  1.697819e-01  0.1219822839  1.495462e-01
##  [3,] -0.395516247 -0.134433702 -1.326848e-01 -0.3073993623  4.475738e-02
##  [4,] -0.053086538  0.028254971 -7.681192e-02 -0.3050606847 -2.329542e-02
##  [5,] -0.176918015  0.217227578  1.397067e-01  0.2211650312 -1.381306e-01
##  [6,] -0.118044302  0.042316027  2.703551e-02  0.0583707243  1.227634e-01
##  [7,]  0.216732129  0.020869369 -1.027057e-01  0.1557499021 -9.008349e-02
##  [8,]  0.173784196 -0.061088718 -2.553344e-01 -0.0295583904  3.459723e-03
##  [9,]  0.018508460 -0.175199211  1.421986e-01 -0.2569884360  1.214819e-01
## [10,]  0.082435526  0.141205803 -2.661248e-02  0.1158676893  4.850834e-02
## [11,]  0.141190588  0.029337367 -1.745256e-01 -0.0171205103 -6.709188e-02
## [12,]  0.318803042  0.027894434 -2.302154e-02  0.1334467083  1.858229e-02
## [13,] -0.126403242 -0.097423606  1.249164e-02 -0.0165321343 -1.633328e-01
## [14,]  0.363193780 -0.109778985 -1.486473e-02  0.0500467047 -9.483209e-03
## [15,]  0.006183926 -0.049541455  7.725440e-03 -0.1349606216 -1.938693e-02
## [16,] -0.134588003 -0.065045215 -6.500528e-02 -0.0876507685  2.208129e-02
## [17,]  0.125324413  0.129353851 -2.383952e-01  0.1436266154 -8.790759e-02
## [18,] -0.068014719  0.074475139 -3.262171e-02 -0.0430050902 -3.569449e-02
## [19,] -0.175914660  0.145224795  9.274055e-02  0.0898520648  1.324165e-01
## [20,] -0.133582249 -0.038385905 -5.717372e-02  0.0268184878 -1.583815e-02
## [21,]  0.083862863 -0.112628222  9.954699e-02 -0.1008881330 -7.552805e-02
## [22,]  0.113533318  0.286359876  3.438710e-02  0.0653542578 -4.875514e-02
## [23,] -0.069520079  0.079893745  4.156201e-02 -0.0070015914  8.301085e-03
## [24,]  0.055608947  0.052050248 -2.159952e-02  0.0090787429  1.118529e-02
## [25,] -0.103429146 -0.051067084  2.390502e-02 -0.0452804379  6.882745e-03
## [26,] -0.154529229  0.076968022  1.548869e-01 -0.0234221518  1.418374e-01
## [27,] -0.080818892 -0.238186851 -2.097901e-01 -0.1325931698  4.622724e-01
## [28,] -0.049345657 -0.068211168 -2.736917e-02  0.0247267243  3.361655e-02
## [29,]  0.004388327 -0.002416073 -2.716474e-03  0.6005147099  4.814963e-03
## [30,]  0.159975827 -0.011521813 -2.145228e-02  0.0624429435 -8.652613e-02
## [31,] -0.071347289  0.063049249 -6.296512e-02  0.0312637240  5.577638e-03
## [32,] -0.081594542  0.096100681  1.897288e-02  0.0057865889  1.770914e-02
## [33,]  0.024433728  0.013047420  6.315423e-02  0.0295099635  9.849215e-03
##              [,16]         [,17]         [,18]         [,19]        [,20]
##  [1,]  0.729519248 -0.4654282033  0.6145101786 -5.230303e-01  0.404629558
##  [2,] -0.004454987  0.0532902069 -0.2886953056 -1.309147e-01 -0.065638825
##  [3,]  0.173774227 -0.1321456581  0.0710461140 -3.485890e-03 -0.034064066
##  [4,]  0.145125508 -0.1716645509  0.1654713005  2.057486e-01 -0.029826207
##  [5,] -0.003747611  0.1350643933  0.0993557647 -2.103981e-02  0.085773699
##  [6,]  0.134351239 -0.0820732787 -0.1958646923 -8.698031e-02  0.102243073
##  [7,] -0.169470713 -0.1753619909 -0.1263721138  7.481533e-02  0.167956844
##  [8,] -0.109498024  0.2787225246  0.1476476640  8.098894e-02  0.054857537
##  [9,]  0.062124364 -0.0796455517 -0.0070934203  1.331812e-01 -0.238949537
## [10,]  0.208020911 -0.2221402228  0.0194950998 -9.073591e-03 -0.037457652
## [11,] -0.075304605 -0.0308459233  0.1307676584  8.391162e-02  0.174672693
## [12,] -0.016913688 -0.0390206575 -0.0773258507 -9.717230e-02 -0.165651381
## [13,] -0.058870152  0.1769033968  0.1474724859 -2.443802e-01 -0.085037738
## [14,] -0.057192788 -0.0006478874  0.0270918347 -2.848697e-02  0.017888863
## [15,]  0.111433178  0.0412853882  0.1174396500  1.713726e-02 -0.015405086
## [16,]  0.030472958  0.0248477180 -0.0391061865 -1.172271e-02  0.187212467
## [17,] -0.036773186  0.1739047021  0.2563164532 -4.634838e-02 -0.013424500
## [18,]  0.054487642 -0.1516308039 -0.0713790730  1.363569e-01 -0.193462729
## [19,] -0.009195495  0.0108222049  0.0386067927  8.010945e-02  0.264992446
## [20,]  0.012713622 -0.0024407080 -0.0201236960  3.243012e-02 -0.125032574
## [21,] -0.013368972  0.0958308280 -0.0161917042  5.893053e-02  0.013056211
## [22,]  0.116157219 -0.0707323328  0.0534757227 -1.633515e-01  0.046397094
## [23,] -0.021190075 -0.0571015254  0.0270981286 -2.595115e-02 -0.039416734
## [24,]  0.066619299  0.0138028972  0.0588065647 -3.545853e-02  0.041930404
## [25,]  0.013491044  0.0224835705 -0.0332784057 -2.427808e-02 -0.038671494
## [26,] -0.179629818 -0.1128255203  0.0715885758 -3.118518e-02 -0.078097954
## [27,]  0.279059350 -0.6783709526  0.2832680941 -4.470558e-01  0.651077688
## [28,]  0.002812791  0.0023631756  0.0107442439 -3.474668e-02  0.044437643
## [29,]  0.555992782 -0.0300672445  0.4324532449 -3.584648e-02 -0.421870410
## [30,] -0.047303200  0.0584600084  0.0606797189  8.731254e-02  0.006997220
## [31,]  0.002551844  0.0417271927 -0.0005584795  4.409665e-03 -0.082695089
## [32,] -0.016753154 -0.0642376095 -0.0155083248 -5.260266e-02 -0.107592255
## [33,]  0.028681222 -0.0017997429 -0.0130134020 -3.148418e-03 -0.030467739
##               [,21]        [,22]         [,23]         [,24]         [,25]
##  [1,] -0.3680885136 -0.882711530  0.3355892599  0.4466132522 -0.6353719831
##  [2,]  0.0624390021  0.004909080  0.1503461301 -0.0781597272 -0.1896225661
##  [3,]  0.0559851378 -0.024810567  0.0572828688  0.0461744368  0.2619299293
##  [4,] -0.2254979610  0.183361381  0.0463441350  0.0584993586  0.2573947608
##  [5,] -0.1350592375 -0.087268651  0.0175623465 -0.0704893544 -0.0831288770
##  [6,]  0.4114254117 -0.212691322 -0.0791730583 -0.0579334423  0.2607204616
##  [7,]  0.0664711446 -0.343663871 -0.0485427082 -0.0594763421 -0.2685116529
##  [8,]  0.0747417137 -0.027526913 -0.1835471094  0.0626802146 -0.1626054049
##  [9,] -0.1591280997 -0.207008615  0.0937459543  0.0849606395  0.0113023007
## [10,] -0.3139918149 -0.013878560 -0.0898885429  0.1633947641 -0.0541496873
## [11,] -0.1042657048 -0.030535419 -0.0858046040  0.0965495780  0.0624740906
## [12,]  0.1701129079  0.351063460  0.0890370384  0.0040065814 -0.0632404089
## [13,]  0.0456270128 -0.101292893 -0.0614905544 -0.1011991799  0.0405284464
## [14,]  0.1203461811 -0.103685878 -0.0313399024 -0.1330443770  0.0897198692
## [15,]  0.1987297088  0.109246142  0.0617827810 -0.0083231479  0.2270543426
## [16,]  0.0263581257 -0.018610211  0.3136822581 -0.0930772647  0.1616401076
## [17,]  0.0107498039 -0.128670648 -0.1241332442 -0.2704400420 -0.0477555208
## [18,] -0.2602904141 -0.028312622 -0.0552988015  0.2057229131 -0.0640010610
## [19,]  0.1337099373 -0.247681186 -0.0206083953  0.1539038718  0.1677477658
## [20,]  0.0990961641 -0.083855487 -0.0913274512 -0.0056753457 -0.0714150220
## [21,] -0.0254487991  0.023942169 -0.0749411359  0.0382329486  0.0080905873
## [22,] -0.0290326513  0.078796923  0.1545740068 -0.2348472476 -0.1423379183
## [23,] -0.0195809733  0.060195632  0.0340739228  0.0192825738 -0.0762929171
## [24,]  0.0311685856 -0.028782271  0.0416885912  0.0239278264 -0.0604248084
## [25,]  0.0032830751 -0.084785134  0.0056296163 -0.0001479712 -0.0143211288
## [26,]  0.1091492772  0.013628869 -0.0410011485  0.1119457558  0.0175196454
## [27,] -0.2237438709 -0.109332383  0.7220876813  0.4136667252 -0.6096450090
## [28,]  0.0237390697 -0.017343374  0.0502511784  0.0266055129 -0.0091913538
## [29,]  0.5266930461 -0.008614014  0.0288121570 -0.4168978631 -0.3681140542
## [30,]  0.0532448329 -0.028833941 -0.1195237041 -0.0558011681 -0.1274895668
## [31,] -0.0333146304 -0.246807039  0.0234096237  0.0342662632 -0.0878212303
## [32,]  0.0132465381  0.001084760 -0.0313191786 -0.0701729953  0.0241898596
## [33,] -0.0145956855  0.039887041 -0.0225421954  0.0058204238  0.0057638520
##              [,26]         [,27]         [,28]        [,29]        [,30]
##  [1,]  0.786992133 -0.4199651182 -0.6453723907  0.413181275 -0.592337310
##  [2,]  0.023518993  0.1333266646  0.0062887268 -0.286748946 -0.226348698
##  [3,] -0.140591189 -0.0559961386 -0.0637828335 -0.236449122 -0.009940902
##  [4,] -0.179455787 -0.0763119981 -0.1757344753  0.059410382  0.201968879
##  [5,]  0.065350711 -0.1417763680  0.1418754160 -0.114062257  0.035366453
##  [6,]  0.239979133 -0.0471462272 -0.0898081586 -0.073613748 -0.102842510
##  [7,] -0.155579731 -0.1221001372  0.1170393452 -0.122735851  0.180331156
##  [8,] -0.058924451 -0.0088449009  0.1321617365  0.208066896 -0.181331381
##  [9,] -0.225022912 -0.1813999116 -0.0628715381 -0.169594198  0.152810901
## [10,]  0.081566341 -0.0032833014 -0.1415512562  0.119351767  0.107442714
## [11,] -0.215007395  0.3074276745 -0.0036737623 -0.105892494  0.156690538
## [12,] -0.144472018  0.0791129619  0.1493418068  0.020660473 -0.149570882
## [13,]  0.343043655 -0.0573783070 -0.0349638350 -0.159750924 -0.020763678
## [14,]  0.022276012 -0.0147032123 -0.0149039906  0.110731550 -0.039397981
## [15,]  0.004534894  0.0813782662 -0.0871411487 -0.180779532 -0.147106886
## [16,] -0.001338653  0.0314606689 -0.0299415495 -0.339367270  0.055473525
## [17,]  0.154076636  0.0083227297 -0.0878422335 -0.212868705  0.027361661
## [18,]  0.087198228  0.0803213418  0.0289936457  0.234723255  0.115321316
## [19,] -0.125790656 -0.1565635204  0.0573705919  0.080314465 -0.205859244
## [20,]  0.015107184  0.1255112886  0.0411323011 -0.014875833  0.040497981
## [21,]  0.023579726  0.1095345318 -0.0751808435 -0.034047056 -0.098379388
## [22,] -0.020787979 -0.0062152846  0.1003907770  0.203302294  0.169440329
## [23,] -0.003423488 -0.0753919184  0.0107697165  0.083153233 -0.008059621
## [24,]  0.044162363 -0.0118014663 -0.0397045836 -0.012474179  0.001924350
## [25,] -0.010680303  0.0138532100  0.0516060814 -0.092330292  0.015218947
## [26,] -0.166870520  0.0170757025 -0.0686621740  0.092716753  0.104306147
## [27,]  0.150313973 -0.2784928083 -0.2209264040  0.200270861 -0.399393409
## [28,]  0.034770533  0.0100712227 -0.0161639899 -0.049843185  0.011308619
## [29,]  0.018338023  0.8713814616 -0.0079655489 -0.499762684  0.012017766
## [30,]  0.007809259 -0.0065824399  0.0303290002  0.046939999  0.050758846
## [31,]  0.019565228 -0.0268781278 -0.0151643241  0.010443625  0.011564441
## [32,] -0.012393128  0.0232871454  0.0016280233  0.025043914 -0.031051833
## [33,] -0.004652527  0.0222678464 -0.0041528922 -0.010910623 -0.093671434
##  [ reached getOption("max.print") -- omitted 24 rows ]
## 
## [[2]]
##  [1] -0.282026708  0.086308412  0.133914471 -0.001192288  0.103316717  0.079597764
##  [7] -0.009325965 -0.510441244  0.547421157  0.319709688 -0.043573927  0.246097714
## [13] -0.056712832 -0.369696021 -0.222868904 -0.339687854 -0.105103947 -0.243444875
## [19]  0.320311934  0.258757710 -0.455855787  0.249252781 -0.071390003  0.325902939
## [25]  0.126275852 -0.411053032 -0.609874368  0.347979546  0.326663256  0.264243215
## 
## [[3]]
##               [,1]          [,2]         [,3]        [,4]        [,5]         [,6]
##  [1,] -0.232501313  0.2928415239  0.140565038  0.17740002  0.10006279  0.066112690
##  [2,]  0.205525517  0.1418457329  0.009758664 -0.48773465  0.36523667 -0.218028933
##  [3,]  0.020741394  0.0304890051 -0.123063497  0.37984690 -0.25943899  0.392152399
##  [4,] -0.155277163 -0.0624808557  0.020432051  0.16997768  0.28997821  0.235081673
##  [5,]  0.159783885  0.0703252107 -0.162827387 -0.06890922  0.21150997 -0.032116160
##  [6,] -0.148045331  0.1159380972 -0.232500330  0.35666385 -0.30366972  0.160232663
##  [7,]  0.154707178  0.0291703604  0.218087405 -0.45685944  0.25949037  0.037612703
##  [8,]  0.196835354 -0.1299752146  0.045175385  0.08499885  0.46906418  0.088327512
##  [9,] -0.000859863  0.0507858098  0.418575317  0.22775583 -0.24715468 -0.028326135
## [10,] -0.082640402 -0.1334992349 -0.019941386 -0.05306740 -0.05850421 -0.250088423
## [11,]  0.218846887  0.0056977342  0.049929272 -0.22007853 -0.04331355  0.096167661
## [12,] -0.112917952 -0.1980956942 -0.296761215 -0.21937050  0.34707263  0.251576781
## [13,]  0.071033612 -0.0838846564 -0.169516370  0.19255896  0.41633186  0.078964733
## [14,]  0.131340623 -0.0873627216 -0.426907361 -0.05536006  0.19281033  0.226786420
## [15,] -0.140042052  0.2809669375  0.045318451  0.13574021  0.11916186  0.317792147
## [16,] -0.058679663  0.4991959035  0.313833565  0.12535290 -0.34603131 -0.012346624
## [17,]  0.068266161  0.0527788028 -0.146560714  0.12513834  0.31015033 -0.395230949
## [18,] -0.282295495  0.1149619743  0.043742247  0.19977170 -0.11052866  0.089333013
## [19,]  0.001007761 -0.3222911954 -0.141588986 -0.08123756  0.01618466 -0.346295059
## [20,] -0.516050577 -0.0778190047  0.083206594 -0.27252418  0.04854503  0.428480864
## [21,]  0.175282553 -0.0409848653  0.164469555 -0.28640541  0.19380839  0.167012900
## [22,]  0.283727527 -0.1300850660 -0.395898968  0.26406237  0.12497630  0.223059118
## [23,] -0.304906577  0.0002150437  0.066671893  0.09744377  0.02524569  0.516375661
## [24,] -0.066698149  0.1708828509 -0.289151698  0.16707940 -0.35984218  0.020823417
## [25,] -0.115078464 -0.0027103587  0.019944256 -0.12949750  0.24461739 -0.188461855
## [26,] -0.166105106  0.2796464264  0.300099224 -0.24221039 -0.11642620 -0.181874424
## [27,] -0.037257303 -0.0642147139  0.059338130  0.23555154  0.22129133 -0.231831908
## [28,] -0.341551542 -0.7165807486 -0.133359149  0.16862960  0.13081951  0.004461672
## [29,]  0.061354119 -0.0723449141 -0.196549684  0.18271901 -0.09029669 -0.146544859
## [30,]  0.057799038 -0.1444665939 -0.127835959  0.16654998 -0.01204011 -0.271814734
##              [,7]        [,8]        [,9]        [,10]         [,11]        [,12]
##  [1,]  0.21311715  0.01846009 -0.11757247 -0.243426323 -0.3862234652 -0.118649416
##  [2,] -0.23126714 -0.16383663  0.11887081  0.197523654 -0.0588766374  0.002542035
##  [3,] -0.14133099  0.09135992 -0.16837624 -0.148576155 -0.0887190849 -0.014054355
##  [4,]  0.31898504 -0.17690499 -0.43045002  0.381945610  0.2868536711  0.098214343
##  [5,] -0.03951347 -0.11831224 -0.30231673  0.383829623 -0.0738805085  0.059082240
##  [6,] -0.32521757 -0.20907737 -0.40785715  0.025166268  0.1014736965 -0.346234292
##  [7,]  0.03041033  0.23221712 -0.08099909  0.003636565 -0.0430878848  0.166518599
##  [8,]  0.36966339  0.47302607  0.02248991  0.008880679  0.1063629240 -0.040956013
##  [9,]  0.09972992 -0.51671368  0.02210282 -0.148625836 -0.5440436006  0.204965234
## [10,] -0.18252677 -0.33520475  0.08321787 -0.248535186 -0.3225192726  0.139201820
## [11,]  0.15886034 -0.04634874 -0.32505721  0.184814841  0.1771183312 -0.032010544
## [12,] -0.08538136 -0.26715699 -0.37737936  0.089217551  0.2128878087  0.098186992
## [13,] -0.04257543  0.13215458 -0.21896927 -0.189680472  0.2404793352  0.432078153
## [14,]  0.33146599  0.39867631 -0.05683200 -0.021324966  0.1083126441 -0.046648916
## [15,]  0.12385551 -0.04911806 -0.23281147 -0.451307118 -0.3546159267 -0.428412825
## [16,]  0.16581391  0.12996516  0.12464382  0.366083175 -0.0534083582 -0.058116075
## [17,] -0.02808530  0.01407588  0.11175124 -0.016355790 -0.0785713792  0.086118408
## [18,] -0.13918613 -0.28295180  0.29532924  0.104088269  0.2270492762 -0.094929419
## [19,]  0.05106626  0.25002211  0.13070023 -0.126312330 -0.0589390993  0.286906868
## [20,] -0.23301166  0.00126946 -0.43899667  0.106028162 -0.3812254667 -0.251451463
## [21,]  0.28314322  0.21671551  0.02784175  0.009237651  0.5404528379 -0.235623732
## [22,]  0.19582392  0.03794044 -0.47519511  0.133860067 -0.1277690679  0.147167236
## [23,] -0.26037890 -0.17749214 -0.22940615  0.199379802 -0.0269921292 -0.294040829
## [24,] -0.34782976  0.10589155  0.21345676 -0.161668837 -0.1334689558  0.002756604
## [25,] -0.04485632  0.07684859 -0.01600491  0.048689399  0.0008669847  0.248176157
## [26,] -0.08734338  0.02546140  0.54945630 -0.234910369  0.0205506999 -0.153706104
## [27,]  0.14280492  0.64715296 -0.24673127 -0.018834081  0.5944663882 -0.010605807
## [28,]  0.14914832  0.16687293 -0.15919809 -0.301595658  0.1982628405  0.360702783
## [29,] -0.37942412 -0.08501843  0.18151483 -0.230215773 -0.3724642098  0.159492224
## [30,]  0.25246176  0.10979345 -0.09463172  0.389581740 -0.2762593627 -0.181715548
##             [,13]        [,14]       [,15]       [,16]
##  [1,]  0.23845367  0.391559780  0.37299955 -0.04766696
##  [2,] -0.27139577 -0.104887053  0.21994556  0.09419525
##  [3,] -0.39177430 -0.241600037  0.15973774  0.01045218
##  [4,] -0.03676064 -0.396023393 -0.43245903  0.10610139
##  [5,] -0.23092407 -0.099788874 -0.07021088  0.04591432
##  [6,] -0.10818826  0.035178673  0.16942944  0.11824833
##  [7,] -0.28309378  0.024497485 -0.05137254  0.24311860
##  [8,]  0.06910275 -0.381036431  0.10132520  0.04928410
##  [9,] -0.09075980  0.357842714 -0.12500729 -0.37084690
## [10,]  0.35088387  0.404264748  0.25987670  0.23078181
## [11,]  0.02226515  0.327820390 -0.27323741  0.18857399
## [12,]  0.10886248 -0.270791233 -0.23130977 -0.24709608
## [13,] -0.03463710 -0.252579361 -0.07026480 -0.31591323
## [14,] -0.16237760  0.131310210  0.07766928 -0.23603871
## [15,]  0.22639863  0.001549316  0.23534317  0.02917634
## [16,]  0.30821735  0.099688768 -0.20200522 -0.14240889
## [17,]  0.12074252 -0.326333433  0.15549643 -0.22406387
## [18,]  0.35737488  0.139834136  0.17556410 -0.21544121
## [19,] -0.09059615 -0.069962151 -0.40033683 -0.28426585
## [20,]  0.09648057  0.017393710  0.06967809  0.25287643
## [21,] -0.21507838 -0.203084618  0.37212044 -0.32377043
## [22,] -0.14056782 -0.080417655 -0.21311249 -0.53956467
## [23,] -0.14297482 -0.129229724 -0.12896508  0.11350271
## [24,] -0.16924819  0.202244282 -0.21844734 -0.04844424
## [25,] -0.30253014 -0.312072754  0.42593831 -0.02449828
## [26,]  0.35514465  0.291752279  0.42608583 -0.16911922
## [27,]  0.01893734  0.256303757  0.15037212 -0.12385029
## [28,] -0.22626632 -0.231029078 -0.36930919 -0.01774714
## [29,]  0.04229600  0.169606328  0.27788422 -0.02444237
## [30,] -0.41787338  0.012206912 -0.20642003 -0.11539765
## 
## [[4]]
##  [1] -0.159209728  0.146082073  0.101375431  0.165886909  0.055261008  0.344417363
##  [7]  0.064184308 -0.080298379  0.056914642  0.028675580 -0.097928695  0.003217116
## [13]  0.116863646  0.156676501  0.143953085  0.114630781
## 
## [[5]]
##               [,1]         [,2]        [,3]         [,4]        [,5]        [,6]
##  [1,] -0.152282700  0.038523350  0.38653836 -0.380773634 -0.15481828 -0.26896128
##  [2,] -0.002409261 -0.300699145 -0.25566009 -0.054319087 -0.15619576  0.29573238
##  [3,] -0.061251935 -0.525169313 -0.41466665 -0.167260155  0.26350382 -0.16835609
##  [4,] -0.203771964 -0.235662818  0.03283354  0.397095561  0.10239325  0.13889283
##  [5,]  0.288018912  0.202324927  0.29104206 -0.004043519 -0.27187756 -0.10990795
##  [6,] -0.143009201  0.003154357 -0.46749309 -0.052855950  0.30990499  0.32147878
##  [7,] -0.253284097  0.196982473  0.13386874  0.430480480  0.22529909 -0.16578899
##  [8,]  0.038224529  0.114053115  0.04370245  0.087276757 -0.11211437  0.35493433
##  [9,] -0.060944010  0.017792344 -0.22335069  0.302471220  0.42281753  0.47397637
## [10,]  0.026804868  0.054076541 -0.16936885 -0.082224429 -0.16018406  0.09136956
## [11,]  0.106467925  0.060271613  0.36786208  0.171622172 -0.02737989 -0.05425362
## [12,] -0.066822834 -0.414545357 -0.09838393  0.058653105 -0.07321890 -0.06187512
## [13,]  0.005708187  0.136617437  0.12932493 -0.189623669 -0.30577087  0.29000598
## [14,] -0.089629099 -0.082457550 -0.14258800  0.362704724  0.23863995  0.20772406
## [15,]  0.029941015  0.077753358  0.06202007  0.041037809  0.27838171  0.09378575
## [16,] -0.175240129 -0.184329063  0.04960376  0.156399786 -0.13394234  0.38875133
##              [,7]        [,8]         [,9]       [,10]       [,11]         [,12]
##  [1,]  0.18644619 -0.19281806 -0.143334940 -0.43504941 -0.31829938 -0.0006827569
##  [2,] -0.09701722  0.39278978  0.390160322 -0.14255829 -0.32233205 -0.5382270813
##  [3,] -0.42291000  0.34145388  0.346383959  0.10115951  0.07644945  0.0558606833
##  [4,] -0.19441891 -0.01399509 -0.633135498  0.42795143 -0.41413337  0.0103627946
##  [5,]  0.36957794  0.38144821 -0.828845680 -0.16913752 -0.21980137 -0.1048631519
##  [6,] -0.03937756  0.34377825 -0.124571137 -0.15839210  0.19164470 -0.1110875979
##  [7,] -0.07707267  0.16724129 -0.350151658  0.08078595  0.32952553 -0.0707903057
##  [8,]  0.31005588 -0.29557404  0.002799409  0.05246624 -0.49288648  0.3881639242
##  [9,]  0.06023601 -0.23717369 -0.315916836 -0.02953328  0.27160171 -0.3752539754
## [10,] -0.12521099 -0.03396337  0.031071853 -0.26151145  0.16427286  0.2185252756
## [11,]  0.36634079  0.25566199 -0.422173470 -0.29623577  0.37052202  0.3485530913
## [12,] -0.53180194 -0.04759547 -0.184383258 -0.15032518  0.24546383 -0.1006838754
## [13,]  0.18483546 -0.16181123 -0.200188935 -0.14755033 -0.43550226 -0.2975659072
## [14,] -0.14455917 -0.10872443 -0.434421778 -0.11542961 -0.30341613 -0.2038914859
## [15,]  0.12473521  0.21254829  0.126445428 -0.27438286  0.29574955 -0.0166060813
## [16,]  0.08147255  0.19752879  0.116355114 -0.07993130  0.02051583 -0.4224003851
##             [,13]         [,14]        [,15]       [,16]
##  [1,]  0.38634279 -0.0007085439 -0.156274021  0.26565817
##  [2,] -0.40488371 -0.3433353007  0.457166672  0.13971929
##  [3,]  0.01410800  0.1783063263 -0.296983391  0.51636040
##  [4,] -0.09257359  0.0875857174  0.393532515  0.09416457
##  [5,] -0.28497899  0.3067537844 -0.132819816  0.02839280
##  [6,] -0.28649375  0.1775925159 -0.165738568  0.32616076
##  [7,]  0.37432221 -0.1544353068 -0.356974065 -0.22735108
##  [8,]  0.26928824  0.4040215611  0.269536018 -0.13866560
##  [9,] -0.22109330  0.1480466127 -0.079814270 -0.05406047
## [10,]  0.25025889  0.3707785904 -0.009046328 -0.02022530
## [11,]  0.05004594  0.2267424762  0.128830954  0.34827989
## [12,] -0.23982549  0.1706580520 -0.013123713  0.09674814
## [13,]  0.08120941 -0.1147394851  0.360345364  0.44118407
## [14,]  0.08309616  0.3190802932  0.206942201  0.46079457
## [15,] -0.26101470 -0.0420643725 -0.168586209  0.21677449
## [16,] -0.12710108  0.3472642601  0.080618002  0.41166443
## 
## [[6]]
##  [1] -0.21097274 -0.02019890 -0.03659583 -0.11521966  0.11454046  0.19045204
##  [7] -0.37585503  0.10573452 -0.15516104  0.13904567 -0.07318669 -0.20604302
## [13] -0.07853336 -0.06871726  0.14789663  0.11962701
## 
## [[7]]
##               [,1]
##  [1,] -0.073339120
##  [2,] -0.249016926
##  [3,] -0.395099044
##  [4,] -0.305241108
##  [5,]  0.524895310
##  [6,]  0.439581573
##  [7,] -0.427478611
##  [8,]  0.026368108
##  [9,] -0.002637332
## [10,]  0.208679855
## [11,] -0.105449475
## [12,] -0.196138307
## [13,] -0.087272167
## [14,] -0.408085197
## [15,]  0.320306331
## [16,]  0.240999863
## 
## [[8]]
## [1] 0.1093302
(results <- yelp_nn %>% evaluate(x_test, y_test))
##      loss  accuracy 
## 0.4626670 0.7723907

Foi adicionada uma camada de dropout na rede neural, para diminuir o overfit do modelo. Observa-se que funcionou, pois a perda da base de validação não ultrappassa a perda da base de treino.

Desempenho do modelo

tibble(observado = factor(y_test)) %>% 
  bind_cols(data.frame(prob = predict(yelp_nn, as.matrix(x_test)))) %>% 
  roc_auc(observado, prob)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 roc_auc binary         0.837
tibble(observado = factor(y_test)) %>% 
  bind_cols(data.frame(prob = predict(yelp_nn, as.matrix(x_test)))) %>% 
  roc_curve(observado, prob) %>% 
  autoplot()

Pelo gráfico, observa-se que o modelo atingiu um desempenho bom na base de teste.

Recomendação

Usuário criado

Criação de um usuário e definição de seu cluster

glimpse(yelp_usr_cluster)
## Rows: 119,792
## Columns: 24
## $ user_id            <chr> "-4Anvj46CWf57KWI9UQDLg", "-BUamlG3H-7yqpAl1p-msw", "-C…
## $ average_stars      <dbl> 3.50, 1.50, 3.00, 3.56, 3.00, 4.00, 4.17, 3.57, 4.48, 4…
## $ compliment_cool    <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_cute    <dbl> 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_funny   <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_hot     <dbl> 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, …
## $ compliment_list    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_more    <dbl> 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_note    <dbl> 0, 0, 1, 0, 0, 0, 0, 16, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ compliment_photos  <dbl> 0, 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_plain   <dbl> 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, …
## $ compliment_profile <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_writer  <dbl> 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cool               <dbl> 2, 0, 1, 0, 1, 0, 0, 1562, 2, 1, 1, 9, 0, 5, 0, 9, 0, 0…
## $ elite_count        <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ fans               <dbl> 1, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ friends_count      <dbl> 1, 16, 15, 27, 1, 1, 1, 338, 59, 6, 10, 100, 8, 1, 79, …
## $ funny              <dbl> 0, 0, 1, 0, 0, 0, 0, 1266, 3, 1, 4, 0, 1, 1, 1, 5, 0, 0…
## $ review_count_usr   <dbl> 2, 2, 4, 27, 2, 6, 6, 66, 28, 3, 8, 37, 4, 20, 1, 18, 5…
## $ useful             <dbl> 2, 0, 1, 5, 1, 3, 16, 1683, 12, 1, 2, 30, 4, 30, 0, 19,…
## $ year_since         <dbl> 2016, 2016, 2011, 2019, 2014, 2017, 2014, 2019, 2014, 2…
## $ tips_counter       <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 19, 0, 0, 0, 0, 0, 2, 0, 0, …
## $ total_compliments  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cluster_usr        <int> 3, 7, 10, 9, 3, 9, 4, 9, 4, 10, 3, 10, 4, 3, 7, 9, 3, 1…
rm(user)

compliment_max <- 50

user <- tibble(user_id = 'random_user',
               average_stars = round(runif(1, 1.0, 5),2),
               compliment_cool = ceiling(runif(1,0, compliment_max)),
               compliment_cute = ceiling(runif(1,0, compliment_max)),
               compliment_funny = ceiling(runif(1,0, compliment_max)),
               compliment_hot  = ceiling(runif(1,0, compliment_max)),
               compliment_list = ceiling(runif(1,0, compliment_max)),
               compliment_more = ceiling(runif(1,0, compliment_max)),
               compliment_note = ceiling(runif(1,0, compliment_max)),
               compliment_photos = ceiling(runif(1,0, compliment_max)),
               compliment_plain = ceiling(runif(1,0, compliment_max)),
               compliment_profile = ceiling(runif(1,0, compliment_max)),
               compliment_writer = ceiling(runif(1,0, compliment_max)),
               cool = ceiling(runif(1,0, compliment_max)),
               elite_count = 0,
               fans = ceiling(runif(1,0, compliment_max)),
               friends_count = ceiling(runif(1,0, compliment_max)),
               funny = ceiling(runif(1,0, compliment_max)),
               review_count_usr = ceiling(runif(1,0,compliment_max)),
               useful = ceiling(runif(1,0, compliment_max)),
               year_since = ceiling(runif(1,2004, 2019)),
               tips_counter = ceiling(runif(1,0, compliment_max)),
               total_compliments = ceiling(runif(1,0, compliment_max))
                )

## criação aleatória do número de anos que o usuário foi elite
user$elite_count <- ceiling(runif(1,0, (2020-user$year_since)))

#encontra o número do cluster em que o usuário se encaixa
user$cluster_usr <- user_cluster_tree %>%
      predict(user) %>% 
      ceiling()
  
glimpse(user)
## Rows: 1
## Columns: 24
## $ user_id            <chr> "random_user"
## $ average_stars      <dbl> 4.07
## $ compliment_cool    <dbl> 14
## $ compliment_cute    <dbl> 17
## $ compliment_funny   <dbl> 2
## $ compliment_hot     <dbl> 14
## $ compliment_list    <dbl> 13
## $ compliment_more    <dbl> 19
## $ compliment_note    <dbl> 37
## $ compliment_photos  <dbl> 49
## $ compliment_plain   <dbl> 46
## $ compliment_profile <dbl> 31
## $ compliment_writer  <dbl> 24
## $ cool               <dbl> 1
## $ elite_count        <dbl> 7
## $ fans               <dbl> 21
## $ friends_count      <dbl> 25
## $ funny              <dbl> 7
## $ review_count_usr   <dbl> 16
## $ useful             <dbl> 32
## $ year_since         <dbl> 2010
## $ tips_counter       <dbl> 15
## $ total_compliments  <dbl> 24
## $ cluster_usr        <dbl> 8
# seleção aleatória de estabelecimentos e notas atribuídas a cada um baseado no número de reviews

n_reviews <- user$review_count_usr

reviewed_usr <- tibble(business_id = sample(yelp_bz_raw$business_id, n_reviews), #seleçao aleatória de estabelecimentos
                                            stars_rv = ceiling(runif(n_reviews, 1.0, 5)),
                                            year_rv = ceiling(runif(n_reviews, 2009, 2019)),
                                            )
user_hist <- user %>% 
            bind_rows(replicate(n_reviews-1, user, simplify = FALSE)) %>% #replica as informações do usuário
            bind_cols(reviewed_usr) %>% #junta os estabelecimentos e notas dadas
            left_join(., yelp_bz_raw, by = 'business_id') #junta as informações dos estabelecimentos

glimpse(user_hist)
## Rows: 16
## Columns: 63
## $ user_id                    <chr> "random_user", "random_user", "random_user", "r…
## $ average_stars              <dbl> 4.07, 4.07, 4.07, 4.07, 4.07, 4.07, 4.07, 4.07,…
## $ compliment_cool            <dbl> 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,…
## $ compliment_cute            <dbl> 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,…
## $ compliment_funny           <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
## $ compliment_hot             <dbl> 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,…
## $ compliment_list            <dbl> 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,…
## $ compliment_more            <dbl> 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,…
## $ compliment_note            <dbl> 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,…
## $ compliment_photos          <dbl> 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,…
## $ compliment_plain           <dbl> 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,…
## $ compliment_profile         <dbl> 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,…
## $ compliment_writer          <dbl> 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,…
## $ cool                       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
## $ elite_count                <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
## $ fans                       <dbl> 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,…
## $ friends_count              <dbl> 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,…
## $ funny                      <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
## $ review_count_usr           <dbl> 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,…
## $ useful                     <dbl> 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,…
## $ year_since                 <dbl> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010,…
## $ tips_counter               <dbl> 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,…
## $ total_compliments          <dbl> 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,…
## $ cluster_usr                <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
## $ business_id                <chr> "r-kj-kBSKFKh0sM8EVX8AA", "H7rpWv02D6WTu6IpNNDk…
## $ stars_rv                   <dbl> 2, 4, 5, 4, 3, 5, 3, 3, 5, 5, 2, 5, 3, 5, 4, 3
## $ year_rv                    <dbl> 2016, 2011, 2016, 2018, 2019, 2014, 2011, 2011,…
## $ categories                 <chr> "Accessories, Women's Clothing, Men's Clothing,…
## $ latitude                   <dbl> 43.64127, 43.65943, 43.77336, 43.64869, 43.6659…
## $ longitude                  <dbl> -79.43377, -79.38252, -79.49302, -79.38544, -79…
## $ name                       <chr> "Frances Watson", "Bed Bath and Beyond", "Aji S…
## $ review_count               <dbl> 3, 28, 41, 3, 4, 4, 16, 11, 3, 42, 351, 44, 4, …
## $ stars                      <dbl> 5.0, 2.5, 3.0, 3.0, 4.0, 2.0, 4.0, 2.5, 2.5, 3.…
## $ AcceptsInsurance           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ AgesAllowed                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ Alcohol                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0
## $ BYOB                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ BikeParking                <dbl> 2, 2, 2, 0, 2, 0, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0
## $ BusinessAcceptsCreditCards <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0
## $ ByAppointmentOnly          <dbl> 1, 0, 0, 0, 1, 0, 0, 2, 0, 1, 0, 0, 0, 1, 0, 1
## $ Caters                     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0
## $ CoatCheck                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ Corkage                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ DogsAllowed                <dbl> 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ DriveThru                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ GoodForDancing             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ GoodForKids                <dbl> 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0
## $ HappyHour                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0
## $ HasTV                      <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0
## $ NoiseLevel                 <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0
## $ OutdoorSeating             <dbl> 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 1, 0
## $ RestaurantsAttire          <dbl> 0, 0, 1, 0, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0
## $ RestaurantsDelivery        <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 1, 0
## $ RestaurantsGoodForGroups   <dbl> 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0
## $ RestaurantsPriceRange2     <dbl> 3, 2, 2, 0, 2, 0, 0, 3, 0, 1, 3, 2, 0, 0, 3, 0
## $ RestaurantsReservations    <dbl> 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 1, 0, 0, 0
## $ RestaurantsTableService    <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0
## $ RestaurantsTakeOut         <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 1, 0
## $ Smoking                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ WheelchairAccessible       <dbl> 1, 0, 0, 0, 2, 0, 2, 1, 0, 0, 0, 2, 0, 0, 0, 0
## $ WiFi                       <dbl> 0, 0, 1, 0, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0
## $ tips_counter_bz            <dbl> 0, 6, 7, 0, 4, 2, 0, 0, 1, 1, 37, 3, 4, 0, 3, 0
## $ total_compliments_bz       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

Função para recomendação

recomm_f <- function(user, reviewed_usr){
  
  to_go <- yelp_raw %>% 
    filter(stars_rv >= 4) %>% 
    filter(cluster_usr == user$cluster_usr) %>% 
    #filter(cluster_usr == user$cluster) %>% 
    select(business_id) %>% 
    distinct() 
  
  n_go <- nrow(to_go)
  
  #filtra todos os estabelecimentos do cluster do usuário e junta as informações para modelagem
  to_review <- user %>% 
            bind_rows(replicate(n_go-1, user, simplify = FALSE)) %>% #replica as informações do usuário
            bind_cols(to_go) %>% #junta os estabelecimentos e notas dadas
            left_join(., yelp_bz_raw, by = 'business_id')
  
  #prepara a base para o modelo
  user_x_test <- to_review %>% 
          select_if(is.numeric) %>% 
          #select(-stars_rv) %>% 
          scale(center = mean, scale = std) %>% 
          as.matrix()

  #aplica a base no modelo
  predictions <- as_tibble(predict(yelp_nn, user_x_test))
  
  
  #seleciona as principais recomendações
  recommendation <- to_review %>% 
    bind_cols(pred = predictions) %>% 
    anti_join(., reviewed_usr, by = 'business_id') %>% 
    filter(V1 > 0.8)

}

Recomendação para usuário criado

Usuário aleatório da base

Para validar as recomendações, é feito o teste também com um usuário aleatório da base de teste.

n <- ceiling(runif(1,1,nrow(yelp_test)))


(random_user <- yelp_raw[n,]$user_id)
## [1] "UuH7pyPsm4E5bDfXaQJ9dg"
user <- yelp_usr_cluster %>% 
            filter(user_id == random_user)

reviewed_usr <- yelp_raw %>% 
  filter(user_id == random_user)

recommendation <- recomm_f(user,reviewed_usr)
## Warning: The `x` argument of `as_tibble.matrix()` must have column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Recomendação para usuário da base

#top 5 recomendações

  recommendation %>% 
    #group_by(categories) %>% 
    top_n(5, V1) %>%
    arrange(-V1) %>% 
    #ungroup() %>%
    #mutate(name = reorder_within(name, V1)) %>%
    ggplot(aes(x = V1, y = name)) +
    geom_col() +
    #facet_wrap(~categories, scales = "free_y") +
    #scale_y_reordered() +
    labs(x = "Probabilidade de avaliação positiva",
    y = 'Recomendação')

Recomendação por categoria

#categorias

recommendation %>% 
  unnest_tokens(category, categories) %>% 
  filter(category %in% c('food', 'restaurants','bar','club','pub','pizza','pasta','italian')) %>% 
  group_by(category) %>%
  mutate(pred_avg = mean(V1)) %>% 
  ungroup() %>% 
  #arrange(-V1) %>% 
  unique() %>% 
  top_n(n = 5, wt = V1) %>%
  mutate(name = reorder_within(name, -V1, category)) %>%
  ggplot(aes(V1, name, fill = category)) +
  geom_col(show.legend = TRUE) +
  facet_wrap(~category, scales = 'free') +
  scale_x_continuous() +
  scale_y_reordered() +
  labs(x = 'Probabilidade de boa avaliação')

Referências